from transformers import pipeline
from langchain_community.llms import HuggingFacePipeline
import time
from langchain.prompts import PromptTemplate
hf = HuggingFacePipeline.from_model_id(
    model_id="Qwen/Qwen1.5-14B-Chat",
    task="text-generation",
    device=0,
    model_kwargs={"trust_remote_code":True},
    pipeline_kwargs={"max_new_tokens": 500,"temperature":0.9,"do_sample":True},
)
template = """{question}"""
prompt = PromptTemplate.from_template(template)
chain = prompt | hf
question = "介绍下白龙马?"
for i in range(3):
    a = time.time()
    print(chain.invoke({"question": question}))
    print(time.time()-a)




